﻿/*  
cd /projects/hsieh_project/proj_201809/code_1_data/
qsas data_1_agg_sum.sas 5 &
*/

libname hr "/projects/hsieh_project/proj_201809/data/";

%Let dir_out = /projects/hsieh_project/proj_201809/data/;


/*---------------------------------------------------------*/
/* Load Raw Data */
/*---------------------------------------------------------*/

%Global c_keep_year;
/*%Let c_keep_year=%quote(if year in (1977,1987,1997,2007,2013););*/
%Let c_keep_year=%quote( );
%include "/projects/hsieh_project/proj_201809/code_1_data/data_0_read_raw.sas" /source2;

/*---------------------------------------------------------*/
/* Statistics at the Aggregate Level */
/*---------------------------------------------------------*/

/*============================================================================*/
/* Preparation */

/* Data set at year-industry-firm level */

data dt_in;
  set lbd;
run;

proc sort data=dt_in; by year firmnum ch_ind indf_perc;

proc means data=dt_in noprint;
  by year firmnum ch_ind indf_perc;
  output out=dt_in sum(worker est salary)=emp_aggfi est_aggfi pay_aggfi;
run;

data dt_in;
  set dt_in;
  keep year firmnum ch_ind indf_perc emp_aggfi est_aggfi pay_aggfi;
run;

/* Firm's percentile, emp, est, pay in aggregate economy */

/* Create runif to use when rank aggregate firms */

%cal_emp_perc(ds_in=lbd, ds_out=lbd_aggf, var_by=%bquote(year), var_unit=firmnum, var_out=aggf, var_runif=runif_2);

proc sort data=lbd_aggf;
  by year firmnum;
run;

proc sort data=lbd;
  by year firmnum;
run;

proc means data=lbd noprint;
  by year firmnum;
  output out=lbd_firm sum(worker est pay)=emp_aggf est_aggf pay_aggf;
run;

data lbd_firm;
  set lbd_firm;
  keep year firmnum emp_aggf est_aggf pay_aggf;
run;

/* Firm's HHI */

proc sort data=dt_in;
  by year firmnum ch_ind;
run;

proc sort data=lbd_firm;
  by year firmnum;
run;

data dt_hhi;
  merge dt_in lbd_firm;
  by year firmnum;
  hhi_aggf = (emp_aggfi / emp_aggf)**2;
run;

proc means data=dt_hhi noprint;
  by year firmnum;
  output out=dt_hhi sum(hhi_aggf)=hhi_aggf;
run;

data dt_hhi;
  set dt_hhi;
  drop _type_ _freq_;
run;

/* Merge data sets */

data dt_in;
  merge dt_in lbd_firm; /* year-ind_firm & year-firm */
  by year firmnum;
run;

data dt_in;
  merge dt_in lbd_aggf; /* year-ind_firm & year-firm & agg rank */
  by year firmnum;
run;

data dt_in;
  merge dt_in dt_hhi;
  by year firmnum;
run;

/*============================================================================*/
/* Aggregate statistics */

%macro agg_wrapper();

%Let l_agg_perc = 0.1 0.01 0.001 100;
%Let l_ind_perc = 10 5 1;

/*----------------------------------------------------------------------------*/
/* Create variables related only to top firms */

data dt_agg;
  set dt_in;
  by year firmnum;
  if first.firmnum; /* Keep only one obs per firm */
  emp_agg = emp_aggf;
  est_agg = est_aggf;
  pay_agg = pay_aggf;
  n_agg = 1;
  hhi_n_aggf = hhi_aggf;
  hhiw_n_aggf = hhi_aggf*emp_aggf;
run;

%Let l_proc_var=emp_agg est_agg pay_agg n_agg hhi_n_aggf hhiw_n_aggf;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_agg_perc.), &i_list., delimiters=%str( )) ~= );
  %Let i_perc=%scan(%bquote(&l_agg_perc.), &i_list., delimiters=%str( ));
  %Let ic_perc=%sysfunc(compress(%bquote(&i_perc.), %str(.)));
  %put Top &i_perc.% Aggregate Firms: Code &ic_perc;
  
  data dt_agg;
    set dt_agg;
    emp_agg_&ic_perc. = 0;
    est_agg_&ic_perc. = 0;
    pay_agg_&ic_perc. = 0;
    n_agg_&ic_perc. = 0;
    hhi_n_agg_&ic_perc. = 0;
    hhiw_n_agg_&ic_perc. = 0;
  run;
  
  data dt_agg;
    set dt_agg;
    if aggf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	emp_agg_&ic_perc. = emp_aggf;
	est_agg_&ic_perc. = est_aggf;
	pay_agg_&ic_perc. = pay_aggf;
	n_agg_&ic_perc. = 1;
	hhi_n_agg_&ic_perc. = hhi_aggf;
	hhiw_n_agg_&ic_perc. = hhi_aggf*emp_aggf;
      end;
  run;
  
  %Let l_proc_var = &l_proc_var. emp_agg_&ic_perc. est_agg_&ic_perc. pay_agg_&ic_perc. n_agg_&ic_perc. hhi_n_agg_&ic_perc. hhiw_n_agg_&ic_perc.;
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms;
%put &l_proc_var.;

/* Year */

proc sort data=dt_agg;
  by year;
run;

proc means data=dt_agg noprint;
  by year;
  output out=dto_agg sum(&l_proc_var.)=&l_proc_var.;
run;

data dto_agg;
  set dto_agg;
  drop _type_ _freq_;
run;

/*----------------------------------------------------------------------------*/
/* Create variables related to top firms and industries */

proc sort data=dt_in;
  by year firmnum ch_ind;
run;

data dt_aggi;
  set dt_in;
  by year firmnum ch_ind;
  if first.ch_ind;
  emp_aggi = emp_aggfi;
  est_aggi = est_aggfi;
  pay_aggi = pay_aggfi;
  n_aggi = 1;
  ind_aggi = 1;
run;

%Let l_proc_var=emp_aggi est_aggi pay_aggi n_aggi ind_aggi;

%Let i_list = 1;
%do %while (%scan(%bquote(&l_agg_perc.), &i_list., delimiters=%str( )) ~= );
  %Let i_perc=%scan(%bquote(&l_agg_perc.), &i_list., delimiters=%str( ));
  %Let ic_perc=%sysfunc(compress(%bquote(&i_perc.), %str(.)));
  %put Top &i_perc.% Aggregate Firms: Code &ic_perc;
  
  
  data dt_aggi;
    set dt_aggi;
    emp_aggi_&ic_perc. = 0;
    est_aggi_&ic_perc. = 0;
    pay_aggi_&ic_perc. = 0;
    n_aggi_&ic_perc. = 0;
    ind_aggi_&ic_perc. = 0;
  run;
  
  data dt_aggi;
    set dt_aggi;
    if aggf_perc <= %sysevalf(&i_perc. / 100) then 
      do;
	emp_aggi_&ic_perc. = emp_aggfi;
	est_aggi_&ic_perc. = est_aggfi;
	pay_aggi_&ic_perc. = pay_aggfi;
	n_aggi_&ic_perc. = 1;
	ind_aggi_&ic_perc. = 1;
      end;
  run;
  
  %Let l_proc_var = &l_proc_var. emp_aggi_&ic_perc. est_aggi_&ic_perc. pay_aggi_&ic_perc. n_aggi_&ic_perc. ind_aggi_&ic_perc.;
  
  
  %Let j_list = 1;
  %do %while (%scan(%bquote(&l_ind_perc.), &j_list., delimiters=%str( )) ~= );
    %Let j_perc=%scan(%bquote(&l_ind_perc.), &j_list., delimiters=%str( ));
    %Let jc_perc=%sysfunc(compress(%bquote(&j_perc.), %str(.)));
    %put Top &j_perc.% Industry Firms: Code &jc_perc;
  
    data dt_aggi;
      set dt_aggi;
      emp_aggi_&ic_perc._&jc_perc. = 0;
      est_aggi_&ic_perc._&jc_perc. = 0;
      pay_aggi_&ic_perc._&jc_perc. = 0;
      ind_aggi_&ic_perc._&jc_perc. = 0;
    run;
  
    data dt_aggi;
      set dt_aggi;
      if (aggf_perc <= %sysevalf(&i_perc. / 100)) & (indf_perc <= %sysevalf(&j_perc. / 100)) then 
	do;
	  emp_aggi_&ic_perc._&jc_perc. = emp_aggfi;
	  est_aggi_&ic_perc._&jc_perc. = est_aggfi;
	  pay_aggi_&ic_perc._&jc_perc. = pay_aggfi;
	  ind_aggi_&ic_perc._&jc_perc. = 1;
	end;
    run;
  
    %Let l_proc_var = &l_proc_var. emp_aggi_&ic_perc._&jc_perc. est_aggi_&ic_perc._&jc_perc. pay_aggi_&ic_perc._&jc_perc. ind_aggi_&ic_perc._&jc_perc.;
    %Let j_list = %eval(&j_list + 1);
  %end;
  
  %Let i_list = %eval(&i_list + 1);
%end;

%put List of Variables for Top Firms (Related to Industry);
%put &l_proc_var.;

/* Year */

proc sort data=dt_aggi;
  by year;
run;

proc means data=dt_aggi noprint;
  by year;
  output out=dto_aggi sum(&l_proc_var.)=&l_proc_var.;
run;

data dto_aggi;
  set dto_aggi;
  drop _type_ _freq_;
run;

/* Year-Ind */

proc sort data=dt_aggi;
  by year ch_ind;
run;

proc means data=dt_aggi noprint;
  by year ch_ind;
  output out=dto_aggi_ind sum(&l_proc_var.)=&l_proc_var.;
run;

data dto_aggi_ind;
  set dto_aggi_ind;
  drop _type_ _freq_;
run;

%mend;

%agg_wrapper();

/*============================================================================*/
/* Export */

data dt_out;
  merge dto_agg dto_aggi;
  by year;
run;

proc export data=dt_out outfile="&dir_out./agg_sum_all.dta" replace;
run;


data dt_out;
  set dto_aggi_ind;
run;

proc export data=dt_out outfile="&dir_out./agg_sum_ind_all.dta" replace;
run;


/* End of sas file */
